###############################################################################
## AUTHOR: ALAN YAN
## DATE LAST UPDATED: 07/20/2020
## PURPOSE: ANALYZE YOUGOV CONJOINT DATA (GENERATE INCOME CHANGE BENCHMARKS)
###############################################################################
rm(list = ls())

#### todo ####
#(1) combine main corporate gov plots into one facetted plot
#(2) figure out way to get plots to appropriately order factors

#### PACKAGES ####
library(pacman)
p_load(tidyverse,
       estimatr,
       broom,
       haven,
       hrbrthemes,
       cjoint,
       formula.tools, 
       DeclareDesign,
       emmeans,
       lmtest,
       cregg,
       gridExtra,
       egg,
       texreg)

#### FUNCTIONS ####

# ggplot theme
theme_shom_alt <- function (base_size = 11, waffle = FALSE) 
{
  ret <- theme_minimal(base_size = base_size, base_family = "Roboto Condensed") + 
    theme(plot.background = element_rect(fill = "#f5f5f2", 
                                         color = NA), 
          panel.background = element_rect(fill = "#f5f5f2",color = NA), 
          legend.background = element_rect(fill = "#f5f5f2",color = NA))
  if (waffle) {
    ret + theme(axis.text = element_blank())
  }
  else {
    ret
  }
}

# lm robust function that includes omitted categories for plotting
lm_robust_omitted <- function(formula, data, clusters,weights = NULL) {
  require(formula.tools)
  require(tidyverse)
  require(haven)
  results <- lm_robust(formula = formula,
                       data = data,
                       clusters = clusters,
                       weights = weights) %>%
    tidy() 
  omitted_cat <- model.frame(formula = formula, data = data) %>% 
    select(2:length(.)) %>% 
    sapply(., levels) %>% 
    lapply(., "[[", 1) %>% 
    unlist() %>%
    as.data.frame()
  omitted_cat_df <- cbind(paste0(rownames(omitted_cat), omitted_cat[,1]), 
                          0, 0, 0, 0, 0, 0, 0, 
                          lhs.vars(formula)) %>%
    as.data.frame() %>%
    zap_labels() %>%
    setNames(., names(results))
  results_with_omitted <- rbind(results, 
                                omitted_cat_df)
  results_with_omitted[,c(2:8)] <- sapply(results_with_omitted[,c(2:8)],as.numeric)
  return(results_with_omitted)
}

#### LOAD DATA ####
df <- read_rds("01-yougov-conjoint/01-data/cleaned/yougov-conjoint-cleaned.rds")
df %>%
  mutate(cje_income_continuous = as.numeric(as.factor(cje_income))) -> df

#### PLOT MARGINAL MEAN OF INCOME (FIGURE S3) ####

df %>%
  group_by(cje_income_continuous) %>%
  summarize(prob_work = mean(work_binary)) %>%
  mutate(cje_income_actual = seq(30000,300000,10000)) %>%
  ungroup %>%
  ggplot(aes(x = cje_income_actual,y = prob_work)) + 
  geom_smooth(method = "lm") + 
  geom_point(shape = 21,size = 3,fill = "white") + 
  theme_shom_alt(base_size = 12) + 
  theme(axis.text.x = element_text(angle = 45)) + 
  scale_x_continuous(limits = c(30000,300000),
                     breaks = seq(30000,300000,10000)) + 
  labs(x = "Salary",
       y = "Probability Choose Firm",
       title = "Relationship between Salary Offer and Probability Respondent Chooses to Work at Firm",
       subtitle = "The likelihood of choosing a firm increases linearly with salary.",
       caption = "Notes: Estimates generated by computing the binned mean within each salary group.") + 
  ggsave("01-yougov-conjoint/03-src/output/figures/income-effect-conditional-expectation.pdf",
         dpi = 320,width = 10, height = 6,device = cairo_pdf)
  

#### CHECK MODEL (WORK) ####
income_model_linear_work <- lm_robust(work_binary ~ cje_income_continuous,
                        data = df,
                        clusters = df$rid
) 
summary(income_model_linear_work)

income_model_log_work <- lm_robust(work_binary ~ log(cje_income_continuous),
                                 data = df,
                                 clusters = df$rid
) 
summary(income_model_log_work)

#### CHECK MODEL (power) ####
income_model_linear_power <- lm_robust(power_binary ~ cje_income_continuous,
                                      data = df,
                                      clusters = df$rid
) 
summary(income_model_linear_power)

income_model_log_power <- lm_robust(power_binary ~ log(cje_income_continuous),
                                   data = df,
                                   clusters = df$rid
) 
summary(income_model_log_power)

#### CHECK MODEL (responsbiliites) ####
income_model_linear_resp <- lm_robust(responsibility_binary ~ cje_income_continuous,
                                       data = df,
                                       clusters = df$rid
) 
summary(income_model_linear_resp)

income_model_log_resp <- lm_robust(responsibility_binary ~ log(cje_income_continuous),
                                    data = df,
                                    clusters = df$rid
) 
summary(income_model_log_resp)

#### CHECK MODEL (complaint) ####
income_model_linear_complaint <- lm_robust(complaints_binary ~ cje_income_continuous,
                                      data = df,
                                      clusters = df$rid
) 
summary(income_model_linear_complaint)

income_model_log_complaint <- lm_robust(complaints_binary ~ log(cje_income_continuous),
                                   data = df,
                                   clusters = df$rid
) 
summary(income_model_log_complaint)

#### OUTPUT TABLE (TABLE S1) ####
texreg(list(income_model_linear_work,income_model_log_work,
       income_model_linear_power,income_model_log_power,
       income_model_linear_resp,income_model_log_resp,
       income_model_linear_complaint,income_model_log_complaint),
       custom.coef.names = c("Intercept","Salary","Log(Salary)"),
       stars = c(0.01,0.05,0.1),
       label = "tab:linregincome",
       custom.model.names = rep(c("Work","Power","Responsibilities","Complaints"),each = 2),
       custom.note = "%stars Column titles correspond to each respective dependent variable.",
       file = "01-yougov-conjoint/03-src/output/tables/linreg-estimates-income.tex"
)

